Skunkware 5

home *** CD-ROM | disk | FTP | other *** search

/ Skunkware 5 / Skunkware 5.iso / src / Tools / man2html-2.0.2 / man2html.pl < prev next >

Wrap

Perl Script | 1995-06-18 | 14KB | 448 lines

#! /usr/local/contrib/bin/perl ##---------------------------------------------------------------------------## ## File: ## man2html ## Author: ## Earl Hood ehood@convex.com ## Description: ## man2html is a Perl program to convert formatted nroff output ## to HTML. ## ## Recommend command-line options based on platform: ## ## Platform Options ## --------------------------------------------------------------------- ## c2mp <None, the defaults should be okay> ## hp9000s700/800 -leftm 1 -topm 8 ## sun4 -sun ## --------------------------------------------------------------------- ## ##---------------------------------------------------------------------------## ## Copyright (C) 1994 Earl Hood, ehood@convex.com ## ## This program is free software; you can redistribute it and/or modify ## it under the terms of the GNU General Public License as published by ## the Free Software Foundation; either version 2 of the License, or ## (at your option) any later version. ## ## This program is distributed in the hope that it will be useful, ## but WITHOUT ANY WARRANTY; without even the implied warranty of ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the ## GNU General Public License for more details. ## ## You should have received a copy of the GNU General Public License ## along with this program; if not, write to the Free Software ## Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. ##---------------------------------------------------------------------------## require 'newgetopt.pl' || die "Unable to require newgetopt.pl\n"; ($PROG = $0) =~ s/.*\///; $VERSION = "2.0.2"; ## Backspace character: Used in overstriking detection $bs = "\b"; ## Associative array of section titles and their HTML tag wrapper. ## This list allows customization of what HTML tag is used for ## a given section head. ## ## The section title can be a regular expression. Therefore, one must ## be careful about quoting special characters. ## %SectionHead = ( '\S.*OPTIONS.*', '<H2>', 'AUTHORS?', '<H2>', 'BUGS', '<H2>', 'COMPATIBILITY', '<H2>', 'DEPENDENCIES', '<H2>', 'DESCRIPTION', '<H2>', 'DIAGNOSTICS', '<H2>', 'ENVIRONMENT', '<H2>', 'ERRORS', '<H2>', 'EXAMPLES', '<H2>', 'EXTERNAL INFLUENCES', '<H2>', 'FILES', '<H2>', 'LIMITATIONS', '<H2>', 'NAME', '<H2>', 'NOTES?', '<H2>', 'OPTIONS', '<H2>', 'REFERENCES', '<H2>', 'RETURN VALUE', '<H2>', 'SECTION.*:', '<H2>', 'SEE ALSO', '<H2>', 'STANDARDS CONFORMANCE', '<H2>', 'STYLE CONVENTION', '<H2>', 'SYNOPSIS', '<H2>', 'SYNTAX', '<H2>', 'WARNINGS', '<H2>', '\s+Section.*:', '<H3>', ); $HeadFallback = '<H2>'; # Fallback tag if above is not found. ##--------------------------------------------------------------------------- ##-----------## ## MAIN Body ## ##-----------## { &get_cli_opts(); ## Check if processing a keyord search if ($K && $CGIURL) { &man_k(); exit 0; } local($line,$tmp,$i,$head,$preindent,$see_also); $see_also = 1; if (!$BARE) { print STDOUT "<HTML>\n"; print STDOUT "<HEAD>\n", "<TITLE>$TITLE</TITLE>\n", "</HEAD>\n" if $TITLE; print STDOUT "<BODY>\n"; print STDOUT "<H1>$TITLE</H1>\n", "<HR>\n" if $TITLE; } print STDOUT "<PRE>\n"; while(!eof(STDIN)) { for ($i=0; $i < $hdsz; $i++) { <STDIN>; } for ($i=0; $i < $txsz; $i++) { $_ = <STDIN>; ## Try to check if line space is needed at page boundaries ## if (!$NODEPAGE && ($i==0 || $i==($txsz-1)) && !/^\s*$/) { /^(\s*)/; $tmp = length($1); if ($do) { if ($tmp < $preindent) { print STDOUT "\n"; } } else { $do = 1; } $preindent = $tmp; } else { $do = 0; $preindent = 0; } ## Interpret line $line = $_; &entitize(*_); # Convert [$<>] to entity references ## Create anchor links for manpage references s/((((.$bs)+)?[\+_\.\w-])+$((.$bs)+)?\d((.$bs)+)?\w?$)/&make_xref($1)/oge if $CGIURL && $see_also; ## Emphasize underlined words s/((_\010[^_])+[\._]?(_\010[^_])+\)?)/&emphasize($1)/oge; $secth = 0; ## Check for strong text and headings if ($SUN || /.\010./o) { if (!$NOHEADS) { $line =~ s/.\010//go; $tmp = $HeadFallback; foreach $head (keys %SectionHead) { if ($line =~ /^$leftm$head/) { $tmp = $SectionHead{$head}; $secth = 1; last; } } if ($secth || $line =~ /^$leftm\S/o) { if ($CGIURL && $SEEALSO) { if ($line =~ /SEE ALSO/o) { $see_also = 1; } else { $see_also = 0; } } chop $line; $_ = $tmp . $line . $tmp; s%<([^>]*)>$%</$1>%; $_ = "\n</PRE>\n" . $_ . "<PRE>\n"; } else { s/(((.\010)+.)+)/&strongize($1)/oge; } } else { s/(((.\010)+.)+)/&strongize($1)/oge; } } print STDOUT; } for ($i=0; $i < $ftsz; $i++) { <STDIN>; } } print STDOUT "</PRE>\n", "</BODY>\n", "</HTML>\n" unless $BARE; exit 0; } ## End Main ##--------------------------------------------------------------------------- sub get_cli_opts { &usage unless &NGetOpt( "botm=i", # Number of lines for bottom margin (def: 7) "headmap=s", # Filename of user section head map file "leftm=i", # Character width of left margin (def: 0) "nodepage", # Do not remove pagination lines "noheads", # Do not detect for section heads "pgsize=i", # Number of lines in a page (def: 66) "title=s", # Title of manpage (def: Not defined) "topm=i", # Number of lines for top margin (def: 7) "sun", # Section heads are not overstriked in input "cgiurl=s", # CGI URL for linking to other manpages "seealso", # Link to other manpages only in the SEE ALSO section "k", # Process input from 'man -k' output. "bare", # Leave out HTML, HEAD, BODY tags. "help" # Short usage message ); &usage() if defined($opt_help); $pgsz = ($opt_pgsize ? $opt_pgsize : 66); if (defined($opt_nodepage)) { $hdsz = 0; $ftsz = 0; } else { $hdsz = (defined($opt_topm) ? $opt_topm : 7); $ftsz = (defined($opt_botm) ? $opt_botm : 7); } $txsz = $pgsz - ($hdsz + $ftsz); $leftmsz = (defined($opt_leftm) ? $opt_leftm : 0); $leftm = ' ' x $leftmsz; $TITLE = ($opt_title ? $opt_title : ""); $NOHEADS = (defined($opt_noheads) ? 1 : 0); $SUN = (defined($opt_sun) ? 1 : 0); $CGIURL = ($opt_cgiurl ? $opt_cgiurl : ""); $SEEALSO = ($opt_seealso ? 1 : 0); $K = ($opt_k ? 1 : 0); $BARE = ($opt_bare ? 1 : 0); if (defined($opt_headmap)) { require $opt_headmap || warn "Unable to read $opt_headmap\n"; } } ##--------------------------------------------------------------------------- sub emphasize { local($txt) = shift; $txt =~ s/.\010//go; $txt = "<EM>$txt</EM>"; $txt; } ##--------------------------------------------------------------------------- sub strongize { local($txt) = shift; $txt =~ s/.\010//go; $txt = "<STRONG>$txt</STRONG>"; $txt; } ##--------------------------------------------------------------------------- sub entitize { local(*txt) = shift; ## Check for special characters in overstrike text ## $txt =~ s/_\010\&/&strike('_', '&')/geo; $txt =~ s/_\010</&strike('_', '<')/geo; $txt =~ s/_\010>/&strike('_', '>')/geo; $txt =~ s/(\&\010)+\&/&strike('&', '&')/geo; $txt =~ s/(<\010)+</&strike('<', '<')/geo; $txt =~ s/(>\010)+>/&strike('>', '>')/geo; ## Check for special characters in regular text. Must be careful ## to check before/after character in expression because it might be ## a special character. $txt =~ s/([^\010]\&[^\010])/&htmlize2($1)/geo; $txt =~ s/([^\010]<[^\010])/&htmlize2($1)/geo; $txt =~ s/([^\010]>[^\010])/&htmlize2($1)/geo; } ##--------------------------------------------------------------------------- ## htmlize2() is used by entitize. ## sub htmlize2 { local($str) = shift; $str =~ s/&/\&/g; $str =~ s/</\</g; $str =~ s/>/\>/g; $str; } ##--------------------------------------------------------------------------- ## strike converts HTML special characters in overstriked text ## into entity references. The entities are overstriked so ## strongize() and emphasize() will recognize the entity to be ## wrapped in <STRONG>/<EM> tags. ## sub strike { local($w, $char) = @_; local($ret); if ($w eq '_') { if ($char eq '&') { $ret = "_$bs\&_${bs}a_${bs}m_${bs}p_${bs};"; } elsif ($char eq '<') { $ret = "_$bs\&_${bs}l_${bs}t_${bs};"; } elsif ($char eq '>') { $ret = "_$bs\&_${bs}g_${bs}t_${bs};"; } else { warn qq|Unrecognized character, "$char", passed to strike()\n|; } } else { if ($char eq '&') { $ret = "\&$bs\&a${bs}am${bs}mp${bs}p;${bs};"; } elsif ($char eq '<') { $ret = "\&$bs\&l${bs}lt${bs}t;${bs};"; } elsif ($char eq '>') { $ret = "\&$bs\&g${bs}gt${bs}t;${bs};"; } else { warn qq|Unrecognized character, "$char", passed to strike()\n|; } } $ret; } ##--------------------------------------------------------------------------- ## make_xref() was originally added to man2html by Maurice Cinquini ## <mauricec@tplrd.tpl.oz.au> for use in the SEE ALSO section. The ## code has been modified to handle more general cases, and the routine ## is called for all manpage cross-references throughout. ## ## Specifically, I modified it to support the user's URL template for ## linking to other manpages, support for [+_,-] in the title name, ## and to handle <EM> tagging. ## sub make_xref { local($str) = shift; $str =~ s/.\010//go; # Remove overstriking local($title,$section,$subsection) = ($str =~ /([\+_\.\w-]+)$(\d)(\w?)$/); local($href) = (eval "\"$CGIURL\""); qq|<STRONG><A HREF="$href">$str</A></STRONG>|; } ##--------------------------------------------------------------------------- ## man_k() process a keyword search. ## sub man_k { local($line,$refs,$section,$subsection,$desc,$i, %Sec1, %Sec1sub, %Sec2, %Sec2sub, %Sec3, %Sec3sub, %Sec4, %Sec4sub, %Sec5, %Sec5sub, %Sec6, %Sec6sub, %Sec7, %Sec7sub, %Sec8, %Sec8sub, %Sec9, %Sec9sub, %SecN, %SecNsub, %SecNsec); print STDOUT "<HTML>\n"; print STDOUT "<HEAD>\n", "<TITLE>$TITLE</TITLE>\n", "</HEAD>\n" if $TITLE; print STDOUT "<BODY>\n"; print STDOUT "<H1>$TITLE</H1>\n", "<HR>\n" if $TITLE; while ($line = <STDIN>) { next if $line !~ /$\d\w?$\s*-/; ($refs,$section,$subsection,$desc) = $line =~ /^\s*(.*)$(\d)(\w?)$\s*-\s*(.*)$/; $refs =~ s/\s(and|or)\s/,/gi; # Convert and/or to commas $refs =~ s/\s//g; # Remove all whitespace $refs =~ s/,/, /g; # Put space after comma &htmlize(*desc); # Check for special chars in desc $desc =~ s/^(.)/\U$1/; # Uppercase first letter in desc if ($section eq '1') { $Sec1{$refs} = $desc; $Sec1sub{$refs} = $subsection; } elsif ($section eq '2') { $Sec2{$refs} = $desc; $Sec2sub{$refs} = $subsection; } elsif ($section eq '3') { $Sec3{$refs} = $desc; $Sec3sub{$refs} = $subsection; } elsif ($section eq '4') { $Sec4{$refs} = $desc; $Sec4sub{$refs} = $subsection; } elsif ($section eq '5') { $Sec5{$refs} = $desc; $Sec5sub{$refs} = $subsection; } elsif ($section eq '6') { $Sec6{$refs} = $desc; $Sec6sub{$refs} = $subsection; } elsif ($section eq '7') { $Sec7{$refs} = $desc; $Sec7sub{$refs} = $subsection; } elsif ($section eq '8') { $Sec8{$refs} = $desc; $Sec8sub{$refs} = $subsection; } elsif ($section eq '9') { $Sec9{$refs} = $desc; $Sec9sub{$refs} = $subsection; } else { # Catch all $SecN{$refs} = $desc; $SecNsec{$refs} = $section; $SecNsub{$refs} = $subsection; } } &print_mank_sec(*Sec1, 1, *Sec1sub); &print_mank_sec(*Sec2, 2, *Sec2sub); &print_mank_sec(*Sec3, 3, *Sec3sub); &print_mank_sec(*Sec4, 4, *Sec4sub); &print_mank_sec(*Sec5, 5, *Sec5sub); &print_mank_sec(*Sec6, 6, *Sec6sub); &print_mank_sec(*Sec7, 7, *Sec7sub); &print_mank_sec(*Sec8, 8, *Sec8sub); &print_mank_sec(*Sec9, 9, *Sec9sub); &print_mank_sec(*SecN, 'N', *SecNsub, *SecNsec); print STDOUT "</DL>\n", "</BODY>\n", "</HTML>\n"; } ##--------------------------------------------------------------------------- ## print_mank_sec() prints out manpage cross-refs of a specific section. sub print_mank_sec { local(*sec, $sect, *secsub, *secsec) = @_; local(@array, @refs, $href, $item, $title, $subsection, $i, $section); $section = $sect; @array = sort keys %sec; if ($#array >= 0) { print STDOUT "<H2>Section $section</H2>\n", "<DL>\n"; foreach $item (@array) { $section = $secsec{$item} if $sect eq 'N'; @refs = split(/,/,$item); $title = $refs[0]; $title =~ s///g; # Watch out for extra ()'s $subsection = $secsub{$item}; $href = eval "\"$CGIURL\""; # Create HREF string print STDOUT "<DT>\n"; $i = 0; foreach (@refs) { print STDOUT qq|<A HREF="$href">$_</A>|; print STDOUT ", " if $i < $#refs; $i++; } print STDOUT " ($section$subsection)\n", "<DD>\n", $sec{$item}, "\n"; } print STDOUT "</DL>\n"; } } ##--------------------------------------------------------------------------- sub htmlize { local(*str) = shift; $str =~ s/&/\&/g; $str =~ s/</\</g; $str =~ s/>/\>/g; $str; } ##--------------------------------------------------------------------------- sub usage { print STDOUT <<EndOfUsage; Usage: $PROG [ options ] < infile > outfile Options: -bare : Do not put in HTML, HEAD, BODY tags -botm <#> : Number of lines for bottom margin (def: 7) -cgiurl <url> : URL for linking to other manpages -headmap <file> : Filename of user section head map file -help : This message -k : Process a keyword search result -leftm <#> : Character width of left margin (def: 0) -nodepage : Do not remove pagination lines -noheads : Do not detect for section heads -pgsize <#> : Number of lines in a page (def: 66) -seealso : Link to other manpages only in the SEE ALSO section -sun : Section heads are not overstriked in input -title <string> : Title of manpage (def: Not defined) -topm <#> : Number of line for top margin (def: 7) Description: $PROG takes formatted manpages from STDIN and converts it to HTML sent to STDOUT. The -topm and -botm arguments are the number of lines to the main body text and NOT to the running headers/footers. Version: $VERSION EndOfUsage exit 0; }